1. IMPORT
library(ggplot2)
library(cowplot)
library(tidyverse)
library(dplyr)
library(Seurat)
library(RColorBrewer)
library(SingleCellExperiment)
# Import seurat object
seur.human <- readRDS("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/raw_data/human_data/filtered_seurat_harmony.11.22.22.RDS")
# Sanity check
print(seur.human) # 81,378 cells (it's the whole seurat object)
## An object of class Seurat
## 17212 features across 81378 samples within 1 assay
## Active assay: RNA (17212 features, 2000 variable features)
## 4 dimensional reductions calculated: pca, initial_umap, harmony, UMAP_50
# Quick visualization
DimPlot(seur.human, reduction="UMAP_50", group.by="new_clusters")+
scale_color_manual(values=colorRampPalette(brewer.pal(12,"Paired"))(18))

# Check if mitochondrial & ribosomal genes still in the count data
# rownames(seur.human)[stringr::str_detect(rownames(seur.human), "MT")]
# rownames(seur.human)[stringr::str_detect(rownames(seur.human), "RP")] # ribosomal genes still present
2. QUICK QC
# Look at qc measures per batch & donor
VlnPlot(seur.human, features="percent.mt", group.by="Batch", split.by="Donor") + labs(x="Batch", fill="Donor")

VlnPlot(seur.human, features="nFeature_RNA", group.by="Batch", split.by="Donor") + labs(x="Batch", fill="Donor")

VlnPlot(seur.human, features="nCount_RNA", group.by="Batch", split.by="Donor") + labs(x="Batch", fill="Donor")

# Look at qc measures per cell group
VlnPlot(seur.human, features="percent.mt", group.by="Batch", split.by="group.ident") + labs(x="Batch")

VlnPlot(seur.human, features="nFeature_RNA", group.by="Batch", split.by="group.ident") + labs(x="Batch")

VlnPlot(seur.human, features="nCount_RNA", group.by="Batch", split.by="group.ident") + labs(x="Batch")

# Look at qc measures per cluster
VlnPlot(seur.human, features="percent.mt", group.by="new_clusters") + labs(x="Clusters") + theme(legend.position="none")

VlnPlot(seur.human, features="nFeature_RNA", group.by="new_clusters") + labs(x="Clusters") + theme(legend.position="none")

VlnPlot(seur.human, features="nCount_RNA", group.by="new_clusters") + labs(x="Clusters") + theme(legend.position="none")

# Save plots
# ggsave("~/Downloads/qcplot1.jpeg", width=15, height=6)
# Check correlation between total count & MT content
FeatureScatter(seur.human, feature1 = "nFeature_RNA", feature2 = "percent.mt", group.by="Batch", pt.size=0.1, jitter=T)

FeatureScatter(seur.human, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Batch", pt.size=0.1, jitter=T)

# Print one batch at a time
for(batch in LETTERS[1:9]){
print(FeatureScatter(subset(seur.human, subset=Batch==batch), feature1 = "nFeature_RNA", feature2 = "percent.mt", group.by="Batch", pt.size=0.1, jitter=T) |
FeatureScatter(subset(seur.human, subset=Batch==batch), feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Batch", pt.size=0.1, jitter=T))
}







